import pickle
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import h5py
from scipy import stats
from scipy.stats import poisson
from scipy.stats import norm
import pandas
# open the file of interest, and use pickle loading
signal_file = open ("Sample_pt_250_500.pkl",'rb')
sample_dict = pickle.load(signal_file)
# list all keys of the files
sample_dict.keys()
Index(['pt', 'eta', 'phi', 'mass', 'ee2', 'ee3', 'd2', 'angularity', 't1',
't2', 't3', 't21', 't32', 'KtDeltaR'],
dtype='object')
background_file = open ("qcd_100000_pt_250_500.pkl",'rb')
background_dict = pickle.load(background_file)
There are a total of 20100 events comprising of 100 higgs events and 20000 qcd events. So, we can weigh the data.
n_higgs_norm = 100/20100
n_qcd_norm = 20000/20100
Now, I will use these weights to plot the mass distributions.
fig,ax = plt.subplots(1,3,figsize=(20,5))
n_higgs_counts,bins1 = np.histogram(sample_dict['mass'],bins = 100)
n_qcd_counts,bins2 = np.histogram(background_dict['mass'],bins = 100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal")
ax[0].set_xlim([10,300])
ax[0].set_title('Particle Counts for each Mass')
ax[0].set_xlabel('Mass')
ax[0].set_ylabel('Counts')
ax[0].legend(loc='best')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background")
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Signal")
ax[1].legend(loc='best')
ax[1].set_title('Probability of each Mass')
ax[1].set_xlabel('Mass')
ax[1].set_ylabel('Probability')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal")
ax[2].set_yscale('log')
ax[2].legend(loc='best')
ax[2].set_title('Probability of each Mass')
ax[2].set_xlabel('Mass')
ax[2].set_ylabel('Probability (log Scale)')
plt.show()
The plot above shows the whole data set with the higgs data shown at ~115.
Now I will calculate the significance.
n_higgs = 100
n_qcd = 20000
sigma = poisson.cdf(n_higgs + n_qcd,n_qcd)
probability = norm.isf(1-sigma)
probability
0.7112259282313185
Now let's see how this approximation compares to the data.
approx = n_higgs/np.sqrt(n_qcd)
approx
0.7071067811865475
not equivalent, but pretty close. Although in particle physics, this approximation is not good enough. This approximation is a gaussian statistic and gives us a good idea about what the probability value is.
Now, I will make some mass cuts to find the the best significance
n_higgs_norm = 100/20100
n_qcd_norm = 20000/20100
def cut(sample_data,background_data,higgs_low_bound,higgs_high_bound,bkd_low_bound,bkd_high_bound,feature):
cut_mass_higgs = sample_data[sample_data[feature]>higgs_low_bound]
cut_mass_qcd = background_data[background_data[feature]>bkd_low_bound]
cut_mass_higgs = cut_mass_higgs[cut_mass_higgs[feature]<higgs_high_bound]
cut_mass_qcd = cut_mass_qcd[cut_mass_qcd[feature]<bkd_high_bound]
fig,ax = plt.subplots(1,3,figsize=(20,5))
n_higgs_counts,bins1 = np.histogram(cut_mass_higgs[feature],bins = 100)
n_qcd_counts,bins2 = np.histogram(cut_mass_qcd[feature],bins = 100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal")
ax[0].set_title('Particle Counts for each Mass')
ax[0].set_xlabel('Mass')
ax[0].set_ylabel('Counts')
ax[0].legend(loc='best')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background")
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Signal")
ax[1].legend(loc='best')
ax[1].set_title('Probability of each Mass')
ax[1].set_xlabel('Mass')
ax[1].set_ylabel('Probability')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal")
ax[2].set_yscale('log')
ax[2].legend(loc='best')
ax[2].set_title('Probability of each Mass')
ax[2].set_xlabel('Mass')
ax[2].set_ylabel('Probability (log Scale)')
plt.show()
return cut_mass_higgs, cut_mass_qcd
cut_mass_higgs, cut_mass_qcd = cut(sample_dict,background_dict,100,150,100,150,'mass')
def signif(cut_mass_higgs,cut_mass_qcd,feature):
n_higgs = 100
n_qcd = 20000
higgs = (sum(cut_mass_higgs[feature]) / sum(sample_dict[feature])) * n_higgs
qcd = (sum(cut_mass_qcd[feature]) / sum(background_dict[feature])) * n_qcd
significance = higgs / np.sqrt(qcd)
print(significance)
signif(cut_mass_higgs,cut_mass_qcd,'mass')
1.1556144516115936
Need to find cuts to increase this number
cut_mass_higgs, cut_mass_qcd = cut(sample_dict,background_dict,110,140,110,140,'mass')
signif(cut_mass_higgs,cut_mass_qcd,'mass')
1.464999846258688
cut_mass_higgs, cut_mass_qcd = cut(sample_dict,background_dict,120,130,120,130,'mass')
signif(cut_mass_higgs,cut_mass_qcd,'mass')
2.3470782626118902
cut_mass_higgs, cut_mass_qcd = cut(sample_dict,background_dict,124.5,127,124.5,127,'mass')
signif(cut_mass_higgs,cut_mass_qcd,'mass')
3.1007590939315057
cut_mass_higgs, cut_mass_qcd = cut(sample_dict,background_dict,124.55,127,124.55,127,'mass')
signif(cut_mass_higgs,cut_mass_qcd,'mass')
3.1065303151335293
We see that the optimal mass cuts are at 124.55 and 127 so we can use this interval on the rest of the parameters to get the highest significance value possible.
Set A: First, I will plot the rest of the features to identify the most discriminative parameters and the cut intervals.
def hist(feature,sample_data,background_data):
fig,ax = plt.subplots(1,3,figsize=(20,5))
n_higgs_counts,bins1 = np.histogram(sample_data[feature],bins = 100)
n_qcd_counts,bins2 = np.histogram(background_data[feature],bins = 100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Higgs Signal")
ax[0].set_title(feature)
ax[0].set_title('Particle Counts for each ' + feature)
ax[0].set_xlabel(feature)
ax[0].set_ylabel('Counts')
ax[0].legend(loc='best')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background")
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Higgs Signal")
ax[1].set_title(feature)
ax[1].legend(loc='best')
ax[1].set_xlabel(feature)
ax[1].set_ylabel('Probability')
ax[1].set_title('Probability of each ' + feature + ' value')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Higgs Signal")
ax[2].set_yscale('log')
ax[2].set_ylabel('Probability (log scale)')
ax[2].legend(loc='best')
ax[2].set_title('Probability of each ' + feature + ' value')
ax[2].set_xlabel(feature)
plt.show()
#WITHOUT EVENT SELECTION
parameters = ['pt','eta','phi','ee2','ee3','d2','angularity','t1','t2','t3','t21','t32','KtDeltaR']
for i in parameters:
hist(i,sample_dict,background_dict)
Looking at the other parameters, it is definately possible to make cuts to improve the significance. First lets try the transverse momentum
Set B:
#WITH OPTIMIZED MASS CUTS
for i in parameters:
hist(i,cut_mass_higgs,cut_mass_qcd)
It is possible to make cuts on ee2, ee3, d2, angularity, t2, t3, t21, ktDeltaR. First, I will make cuts on t21 as it is the most discrimative.
n_higgs_norm = 100/20100
n_qcd_norm = 20000/20100
def cut1(sample_data,background_data,higgs_low_bound,higgs_high_bound,bkd_low_bound,bkd_high_bound,feature):
cut_higgs = sample_data[sample_data[feature]>higgs_low_bound] #low bound
cut_qcd = background_data[background_data[feature]>bkd_low_bound]
cut_higgs = cut_higgs[cut_higgs[feature]<higgs_high_bound] #high bound
cut_qcd = cut_qcd[cut_qcd[feature]<bkd_high_bound]
fig,ax = plt.subplots(1,3,figsize=(20,5))
n_higgs_counts,bins1 = np.histogram(cut_higgs[feature],bins = 100)
n_qcd_counts,bins2 = np.histogram(cut_qcd[feature],bins = 100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Higgs Signal")
ax[0].set_title(feature)
ax[0].set_title('Particle Counts for each ' + feature)
ax[0].set_xlabel(feature)
ax[0].set_ylabel('Counts')
ax[0].legend(loc='best')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background")
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Higgs Signal")
ax[1].set_title(feature)
ax[1].legend(loc='best')
ax[1].set_xlabel(feature)
ax[1].set_ylabel('Probability')
ax[1].set_title('Probability of each ' + feature + ' value')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background")
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Higgs Signal")
ax[2].set_yscale('log')
ax[2].set_ylabel('Probability (log scale)')
ax[2].legend(loc='best')
ax[2].set_title('Probability of each ' + feature + ' value')
ax[2].set_xlabel(feature)
plt.show()
return cut_higgs, cut_qcd
def signif1(sample_data,background_data,feature):
n_higgs = 100
n_qcd = 20000
higgs = (sum(sample_data[feature]) / sum(sample_dict[feature])) * n_higgs
qcd = (sum(background_data[feature]) / sum(background_dict[feature])) * n_qcd
significance = higgs / np.sqrt(qcd)
print("Expected Significance: " + str(significance))
cut_t21_higgs, cut_t21_qcd = cut1(cut_mass_higgs,cut_mass_qcd,0,0.50,0,0.50,'t21')
signif1(cut_t21_higgs,cut_t21_qcd,'t21')
Expected Significance: 4.584010789439429
Next, I will make cuts on t3
cut_t3_higgs, cut_t3_qcd = cut1(cut_t21_higgs,cut_t21_qcd,0,0.40,0,0.40,'t3')
signif1(cut_t3_higgs,cut_t3_qcd,'t3')
Expected Significance: 4.8262036738161065
Then t2
cut_t2_higgs, cut_t2_qcd = cut1(cut_t3_higgs,cut_t3_qcd,0,0.40,0,0.40,'t2')
signif1(cut_t2_higgs,cut_t2_qcd,'t2')
Expected Significance: 5.023933141212832
Now, KtDeltaR
cut_KtDeltaR_higgs, cut_KtDeltaR_qcd = cut1(cut_t2_higgs,cut_t2_qcd,0.45,0.80,0.45,0.80,'KtDeltaR')
signif1(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,'KtDeltaR')
Expected Significance: 5.190611027920224
next, angularity
cut_angle_higgs, cut_angle_qcd = cut1(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,0,0.01,0,0.01,'angularity')
signif1(cut_angle_higgs,cut_angle_qcd,'angularity')
Expected Significance: 5.60350093514365
next, d2
cut_d2_higgs, cut_d2_qcd = cut1(cut_angle_higgs,cut_angle_qcd,0,1.5,0,1.5,'d2')
signif1(cut_d2_higgs,cut_d2_qcd,'d2')
Expected Significance: 8.587508094837364
This is very good significance. Let's now check the pT plot before and after event selection and compare
#after event selection
hist('pt',cut_d2_higgs,cut_d2_qcd)
#before event selection
hist('pt',sample_dict,background_dict)
We can see that the event selection was successful because much of the background has been eliminated while maintainting a good amount of the higgs data. Now, let's compare the significance for the pT data before/after data selection.
before = signif1(sample_dict,background_dict,'pt')
after = signif1(cut_d2_higgs,cut_d2_qcd,'pt')
Expected Significance: 0.7071067811865475 Expected Significance: 9.94032437221929
We see that the cuts made improved the significance.
highlumi = pandas.read_hdf('data_highLumi_pt_250_500.h5')
highlumi.keys()
Index(['pt', 'eta', 'phi', 'mass', 'ee2', 'ee3', 'd2', 'angularity', 't1',
't2', 't3', 't21', 't32', 'KtDeltaR'],
dtype='object')
First, we add the high luminosity data to the data done in lab 7 by overlaying the observed data onto the training data.
n_higgs_norm = 100/20100
n_qcd_norm = 20000/20100
def plot_lumi(sample_data,background_data,observed_data,feature,low_bound,high_bound):
fig,ax = plt.subplots(1,3,figsize=(20,5))
n_obs_norm = sum(observed_data[feature])/sum(observed_data[feature])
n_higgs_counts,bins1 = np.histogram(sample_data[feature],bins = 100)
n_qcd_counts,bins2 = np.histogram(background_data[feature],bins = 100)
obs_counts,bins3 = np.histogram(observed_data[feature],bins=100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background",histtype='step')
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal",histtype='step')
ax[0].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,color='red',label="Observed Data",histtype='step')
ax[0].legend(loc='best')
ax[0].set_title('Particle Counts for each ' + feature)
ax[0].set_xlabel(feature)
ax[0].set_ylabel('Counts')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background",histtype='step')
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Signal",histtype='step')
ax[1].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,density=True,color='red',label="Observed Data",histtype='step')
ax[1].legend(loc='best')
ax[1].set_xlabel(feature)
ax[1].set_ylabel('Probability')
ax[1].set_title('Probability of each ' + feature + ' value')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background",histtype='step')
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal",histtype='step')
ax[2].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,color='red',label="Observed Data",histtype='step')
ax[2].set_yscale('log')
ax[2].legend(loc='best')
ax[2].set_ylabel('Probability (log scale)')
ax[2].set_title('Probability of each ' + feature + ' value')
ax[2].set_xlabel(feature)
plt.show()
n_higgs_norm = 100/20100
n_qcd_norm = 20000/20100
def cut_lumi(sample_data,background_data,observed_data,feature,low_bound,high_bound):
fig,ax = plt.subplots(1,3,figsize=(20,5))
cut_lumi = observed_data[observed_data[feature]>low_bound]
cut_lumi = cut_lumi[cut_lumi[feature]<high_bound]
n_obs_norm = sum(observed_data[feature])/sum(observed_data[feature])
cut_higgs = sample_data[sample_data[feature]>low_bound]
cut_qcd = background_data[background_data[feature]>low_bound]
cut_higgs = cut_higgs[cut_higgs[feature]<high_bound]
cut_qcd = cut_qcd[cut_qcd[feature]<high_bound]
n_higgs_counts,bins1 = np.histogram(cut_higgs[feature],bins = 100)
n_qcd_counts,bins2 = np.histogram(cut_qcd[feature],bins = 100)
obs_counts,bins3 = np.histogram(cut_lumi[feature],bins=100)
ax[0].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background",histtype='step')
ax[0].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal",histtype='step')
ax[0].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,color='red',label="Observed Data",histtype='step')
ax[0].legend(loc='best')
ax[0].set_title('Particle Counts for each ' + feature)
ax[0].set_xlabel(feature)
ax[0].set_ylabel('Counts')
ax[1].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,density=True,label="Background",histtype='step')
ax[1].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,density=True,label="Signal",histtype='step')
ax[1].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,density=True,color='red',label="Observed Data",histtype='step')
ax[1].legend(loc='best')
ax[1].set_xlabel(feature)
ax[1].set_ylabel('Probability')
ax[1].set_title('Probability of each ' + feature + ' value')
ax[2].hist(bins2[:-1], bins2, weights=n_qcd_counts*n_qcd_norm,label="Background",histtype='step')
ax[2].hist(bins1[:-1], bins1, weights=n_higgs_counts*n_higgs_norm,label="Signal",histtype='step')
ax[2].hist(bins3[:-1], bins3, weights=obs_counts*n_obs_norm,color='red',label="Observed Data",histtype='step')
ax[2].legend(loc='best')
ax[2].set_yscale('log')
ax[2].set_ylabel('Probability (log scale)')
ax[2].set_title('Probability of each ' + feature + ' value')
ax[2].set_xlabel(feature)
plt.show()
return cut_higgs, cut_qcd, cut_lumi
def lumi_sig(cut_higgs,cut_qcd,cut_lumi,feature,lumi):
n_higgs = 100
n_qcd = 20000
higgs = (sum(cut_higgs[feature]) / sum(sample_dict[feature])) * n_higgs
lumi = (sum(cut_lumi[feature]) / sum(lumi[feature])) * n_qcd
significance = higgs / np.sqrt(lumi)
print("Observed Significance: " + str(significance))
I will overlap the observed data on the sample data with optimal cuts and compare the significance.
plot_lumi(sample_dict,background_dict,highlumi,'mass',0,500)
cut_mass_higgs, cut_mass_qcd, cut_mass_highlumi = cut_lumi(sample_dict,background_dict,highlumi,'mass',124.55,127)
signif1(cut_mass_higgs,cut_mass_qcd,'mass')
lumi_sig(cut_mass_higgs,cut_mass_qcd, cut_mass_highlumi,'mass',highlumi)
Expected Significance: 3.1065303151335293 Observed Significance: 2.7934212597565553
Next, I will do this with the same parameters used in the lab 7 to improve the significance and compare with the significance used observed in lab 7.
plot_lumi(cut_mass_higgs,cut_mass_qcd,cut_mass_highlumi,'t21',0,500)
cut_t21_higgs, cut_t21_qcd, cut_t21_highlumi = cut_lumi(cut_mass_higgs,cut_mass_qcd,cut_mass_highlumi,'t21',0,0.50)
signif1(cut_t21_higgs,cut_t21_qcd,'t21')
lumi_sig(cut_t21_higgs,cut_t21_qcd, cut_t21_highlumi,'t21',highlumi)
Expected Significance: 4.584010789439429 Observed Significance: 3.9375365846257573
#t3
plot_lumi(cut_t21_higgs,cut_t21_qcd,cut_t21_highlumi,'t3',0,500)
cut_t3_higgs, cut_t3_qcd, cut_t3_highlumi = cut_lumi(cut_t21_higgs,cut_t21_qcd,cut_t21_highlumi,'t3',0,0.40)
signif1(cut_t3_higgs,cut_t3_qcd,'t3')
lumi_sig(cut_t3_higgs,cut_t3_qcd, cut_t3_highlumi,'t3',highlumi)
Expected Significance: 4.8262036738161065 Observed Significance: 4.106255681013995
#t2
plot_lumi(cut_t3_higgs,cut_t3_qcd,cut_t3_highlumi,'t2',0,500)
cut_t2_higgs, cut_t2_qcd, cut_t2_highlumi = cut_lumi(cut_t3_higgs,cut_t3_qcd,cut_t3_highlumi,'t2',0,0.40)
signif1(cut_t2_higgs,cut_t2_qcd,'t2')
lumi_sig(cut_t2_higgs,cut_t2_qcd, cut_t2_highlumi,'t2',highlumi)
Expected Significance: 5.023933141212832 Observed Significance: 4.081977570330204
In this case, the significance actually decreases...
#KtDeltaR
plot_lumi(cut_t2_higgs,cut_t2_qcd,cut_t2_highlumi,'KtDeltaR',0,500)
cut_KtDeltaR_higgs, cut_KtDeltaR_qcd, cut_KtDeltaR_highlumi = cut_lumi(cut_t2_higgs,cut_t2_qcd,cut_t2_highlumi,'KtDeltaR',0.45,0.80)
signif1(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,'KtDeltaR')
lumi_sig(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd, cut_KtDeltaR_highlumi,'KtDeltaR',highlumi)
Expected Significance: 5.190611027920224 Observed Significance: 3.6663602914134
Significance decreases again.
#angularity
plot_lumi(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,cut_KtDeltaR_highlumi,'angularity',0,500)
cut_angle_higgs, cut_angle_qcd, cut_angle_highlumi = cut_lumi(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,cut_KtDeltaR_highlumi,'angularity',0,0.01)
signif1(cut_angle_higgs,cut_angle_qcd,'angularity')
lumi_sig(cut_angle_higgs,cut_angle_qcd, cut_angle_highlumi,'angularity',highlumi)
Expected Significance: 5.60350093514365 Observed Significance: 4.0432623413724125
Observed significance increases again
#d2
plot_lumi(cut_angle_higgs,cut_angle_qcd,cut_angle_highlumi,'d2',0,500)
cut_d2_higgs, cut_d2_qcd, cut_d2_highlumi = cut_lumi(cut_angle_higgs,cut_angle_qcd,cut_angle_highlumi,'d2',0,1.5)
signif1(cut_d2_higgs,cut_d2_qcd,'d2')
lumi_sig(cut_d2_higgs,cut_d2_qcd, cut_d2_highlumi,'d2',highlumi)
Expected Significance: 8.587508094837364 Observed Significance: 5.275155863120274
lowlumi = pandas.read_hdf('data_lowLumi_pt_250_500.h5')
lowlumi.keys()
Index(['pt', 'eta', 'phi', 'mass', 'ee2', 'ee3', 'd2', 'angularity', 't1',
't2', 't3', 't21', 't32', 'KtDeltaR'],
dtype='object')
plot_lumi(sample_dict,background_dict,lowlumi,'mass',0,500)
cut_mass_higgs, cut_mass_qcd, cut_mass_lowlumi = cut_lumi(sample_dict,background_dict,lowlumi,'mass',124.55,127)
signif1(cut_mass_higgs,cut_mass_qcd,'mass')
lumi_sig(cut_mass_higgs,cut_mass_qcd, cut_mass_lowlumi,'mass',lowlumi)
Expected Significance: 3.1065303151335293 Observed Significance: 2.755094994765747
#t21
plot_lumi(cut_mass_higgs,cut_mass_qcd,cut_mass_lowlumi,'t21',0,500)
cut_t21_higgs, cut_t21_qcd, cut_t21_lowlumi = cut_lumi(cut_mass_higgs,cut_mass_qcd,cut_mass_lowlumi,'t21',0,0.50)
signif1(cut_t21_higgs,cut_t21_qcd,'t21')
lumi_sig(cut_t21_higgs,cut_t21_qcd, cut_t21_lowlumi,'t21',lowlumi)
Expected Significance: 4.584010789439429 Observed Significance: 3.8647846086183946
#t3
plot_lumi(cut_t21_higgs,cut_t21_qcd,cut_t21_lowlumi,'t3',0,500)
cut_t3_higgs, cut_t3_qcd, cut_t3_lowlumi = cut_lumi(cut_t21_higgs,cut_t21_qcd,cut_t21_lowlumi,'t3',0,0.40)
signif1(cut_t3_higgs,cut_t3_qcd,'t3')
lumi_sig(cut_t3_higgs,cut_t3_qcd, cut_t3_lowlumi,'t3',lowlumi)
Expected Significance: 4.8262036738161065 Observed Significance: 4.083910892857353
#t2
plot_lumi(cut_t3_higgs,cut_t3_qcd,cut_t3_lowlumi,'t2',0,500)
cut_t2_higgs, cut_t2_qcd, cut_t2_lowlumi = cut_lumi(cut_t3_higgs,cut_t3_qcd,cut_t3_lowlumi,'t2',0,0.40)
signif1(cut_t2_higgs,cut_t2_qcd,'t2')
lumi_sig(cut_t2_higgs,cut_t2_qcd, cut_t2_lowlumi,'t2',lowlumi)
Expected Significance: 5.023933141212832 Observed Significance: 3.87734437977314
#KtDeltaR
plot_lumi(cut_t2_higgs,cut_t2_qcd,cut_t2_lowlumi,'KtDeltaR',0,500)
cut_KtDeltaR_higgs, cut_KtDeltaR_qcd, cut_KtDeltaR_lowlumi = cut_lumi(cut_t2_higgs,cut_t2_qcd,cut_t2_lowlumi,'KtDeltaR',0.45,0.80)
signif1(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,'KtDeltaR')
lumi_sig(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd, cut_KtDeltaR_lowlumi,'KtDeltaR',lowlumi)
Expected Significance: 5.190611027920224 Observed Significance: 3.7460446983586144
#angularity
plot_lumi(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,cut_KtDeltaR_lowlumi,'angularity',0,500)
cut_angle_higgs, cut_angle_qcd, cut_angle_lowlumi = cut_lumi(cut_KtDeltaR_higgs,cut_KtDeltaR_qcd,cut_KtDeltaR_lowlumi,'angularity',0,0.01)
signif1(cut_angle_higgs,cut_angle_qcd,'angularity')
lumi_sig(cut_angle_higgs,cut_angle_qcd, cut_angle_lowlumi,'angularity',lowlumi)
Expected Significance: 5.60350093514365 Observed Significance: 4.123136721441791
#d2
plot_lumi(cut_angle_higgs,cut_angle_qcd,cut_angle_lowlumi,'d2',0,500)
cut_d2_higgs, cut_d2_qcd, cut_d2_lowlumi = cut_lumi(cut_angle_higgs,cut_angle_qcd,cut_angle_lowlumi,'d2',0,1.5)
signif1(cut_d2_higgs,cut_d2_qcd,'d2')
lumi_sig(cut_d2_higgs,cut_d2_qcd, cut_d2_lowlumi,'d2',lowlumi)
Expected Significance: 8.587508094837364 Observed Significance: 5.117287951674127
The confidence interval is given by the expression
$CI = \bar X \pm t*(std)/\sqrt n$
where $\bar X$ is the data mean, t is the confidence level, std is the standard deviation of the data, and n is the data size
def CI(data):
return scipy.stats.t.interval(alpha=0.95, df=len(data)-1, loc=np.mean(data), scale=scipy.stats.sem(data))
interval = CI(sample_dict['mass'])
print("Expected Higgs 95% upper bound: " + str(interval[1]))
Expected Higgs 95% upper bound: 115.04951322065835
interval = CI(background_dict['mass'])
print("Expected QCD 95% upper bound: " + str(interval[1]))
Expected QCD 95% upper bound: 98.0076111970423
interval = CI(highlumi['mass'])
print("Observed 95% upper bound: " + str(interval[1]))
Observed 95% upper bound: 98.2009933747069
We see that the highest upper bound is in the expected higgs signal and the lowest upper bound is from the expected QCD signal.